### WISCONSIN 2008 EXPERIMENT
### Dan Hopkins
### Replication code: 2/24/2016

### Load libraries
library(foreign)
library(xtable)
library(texreg)
library(mice)
library(Zelig)
library(MASS)
library(sampleSelection)

### Set working directory
setwd("C:/Users/danhop/Dropbox/WI 2008/")

### load data set (with proprietary variables)
dta <- read.dta("wi-data-11072016.dta")

dta$black <- 1*(dta$q_race=="black")
dta$hispanic <- 1*(dta$q_race=="hispanic" )
dta$male <- 1*(dta$q_gender=="male")
dta$survey <- 1*(! dta$dv_sen %in% c(NA))

##### Subsets of data

dta$mccain <- 1*(dta$dv_sen=="McCain")
dta$obama <- 1*(dta$dv_sen=="Obama")
dta$undecided <- 1*(dta$dv_sen=="Undecided")

qt <- quantile(dta$q_supportscore,c(0,.2,.4,.6,.8,1),na.rm=T)
dta$support.score.c <- cut(dta$q_supportscore,qt,labels=c(1:5))

xx <- subset(dta,select=c("vh_00g","vh_00p","vh_02g","vh_02p","vh_04g","vh_04p","vh_06g","vh_06p","vh_08p"))

dta$turnout.score <- apply(xx,1,mean)
dta$turnout.score.c <- NA
dta$turnout.score.c[dta$turnout.score==0] <- 0
dta$turnout.score.c[dta$turnout.score==1/9] <- 1
dta$turnout.score.c[dta$turnout.score==2/9] <- 2
dta$turnout.score.c[dta$turnout.score==3/9] <- 3
dta$turnout.score.c[dta$turnout.score==4/9] <- 4
dta$turnout.score.c[dta$turnout.score==5/9] <- 5
dta$turnout.score.c[dta$turnout.score==6/9] <- 6
dta$turnout.score.c[dta$turnout.score==7/9] <- 7
dta$turnout.score.c[dta$turnout.score==8/9] <- 8
dta$turnout.score.c[dta$turnout.score==9/9] <- 9

dta$protestant <- 1*(dta$q_religion=="Protestant")
dta$catholic <- 1*(dta$q_religion=="Catholic")

dta$q_medianhhincomet <- dta$q_medianhhincome/1000

dta$treat000 <- 0
dta$treat000[dta$treat=="c0m0p0"] <- 1

dta$treat001 <- 0
dta$treat001[dta$treat=="c0m0p1"] <- 1

dta$treat100 <- 0
dta$treat100[dta$treat=="c1m0p0"] <- 1

dta$treat101 <- 0
dta$treat101[dta$treat=="c1m0p1"] <- 1

dta$treat010 <- 0
dta$treat010[dta$treat=="c0m1p0"] <- 1

dta$treat011 <- 0
dta$treat011[dta$treat=="c0m1p1"] <- 1

dta$treat110 <- 0
dta$treat110[dta$treat=="c1m1p0"] <- 1

dta$treat111 <- 0
dta$treat111[dta$treat=="c1m1p1"] <- 1

#### CREATE INDICATOR VARIABLES
for(i in 1:9){

	txt1 <- paste("dta$turnout.score.",i,"<- 0",sep="")
	eval(parse(text=txt1))
	txt15 <- paste("dta$turnout.score.",i,"<- 1*(dta$turnout.score.c==",i,")",sep="")
	eval(parse(text=txt15))
	txt2 <- paste("dta$canvass.i.score.",i,"<- 0",sep="")
	eval(parse(text=txt2))
	txt16 <- paste("dta$canvass.i.score.",i,"<- 1*(dta$turnout.score.c==",i," & (dta$canvass==1))",sep="")
	eval(parse(text=txt16))
	txt3 <- paste("dta$phonecall.i.score.",i,"<- 0",sep="")
	eval(parse(text=txt3))
	txt17 <- paste("dta$phonecall.i.score.",i,"<- 1*(dta$turnout.score.c==",i,"& (dta$phonecall==1))",sep="")
	eval(parse(text=txt17))	
	
}



dta.sv20 <- dta[! dta$q_phonematchscore=="",]

#### subset variables for public availability
dta.sub <- subset(dta,select=c("survey","svy_result","obama","canvass","phonecall","mail","black","hispanic","turnout.score.c",
"male","protestant","catholic","q_age","vh_02g","vh_04p","vh_04g","vh_06p","vh_06g","vh_08p","vh_08g","q_phonematchscore"))

save(dta.sub,file="wi-08-public-data.Rdata")


##### subset by survey results
dta.sv <- dta[! dta$dv_sen %in% c(NA),]
dta.sv2 <- dta[! dta$svy_result %in% c(""),]
dta.sv3 <- dta[! dta$svy_result %in% c("","20 DeclinedToParticipate"),]
dta.sv4 <- dta[! dta$svy_result %in% c("","20 DeclinedToParticipate","24 Already voted"),]
#dta.sv5 <- dta[! dta$svy_result %in% c("","21 Do not call","92 Invalid","90 Not in service","86 Tri-Tone","35 Privacy Manager"),]
dta.sv6 <- dta[! dta$svy_result %in% c("80 Wrong number","31 Language barrier","32 Deceased","","21 Do not call","92 Invalid","90 Not in service","86 Tri-Tone","35 Privacy Manager"),]
dta.sv7 <- dta[! dta$svy_result %in% c("20 DeclinedToParticipate","80 Wrong number","31 Language barrier","32 Deceased","","21 Do not call","92 Invalid","90 Not in service","86 Tri-Tone","35 Privacy Manager"),]
dta.sv8 <- dta[! dta$svy_result %in% c("24 Already voted","20 DeclinedToParticipate","80 Wrong number","31 Language barrier","32 Deceased","","21 Do not call","92 Invalid","90 Not in service","86 Tri-Tone","35 Privacy Manager"),]
dta.sv9 <- dta[! dta$svy_result %in% c("30 Early hangup","24 Already voted","20 DeclinedToParticipate","80 Wrong number","31 Language barrier","32 Deceased","","21 Do not call","92 Invalid","90 Not in service","86 Tri-Tone","35 Privacy Manager"),]
dta.sv10 <- dta[! dta$svy_result %in% c("04 Refused","30 Early hangup","24 Already voted","20 DeclinedToParticipate","80 Wrong number","31 Language barrier","32 Deceased","","21 Do not call","92 Invalid","90 Not in service","86 Tri-Tone","35 Privacy Manager"),]
dta.sv10a <- dta[dta$svy_result %in% c("04 Refused"),]

dta.sv11 <- dta[! dta$obama %in% c(NA),]

dta.sv5 <- dta[! dta$svy_result %in% c("","21 Do not call","92 Invalid","90 Not in service","86 Tri-Tone","35 Privacy Manager"),]

#### omnibus tests -- balance

lout.canvass <- lm(canvass ~  I(q_supportscore/100)+as.factor(turnout.score.c)+male+I(ncec_dem_perf/100)+black+hispanic+protestant+catholic+I(q_medianhhincome/100)+I(q_percentcollegegrads/100),data=dta)

lout.canvass.svy <- lm(canvass ~  I(q_supportscore/100)+as.factor(turnout.score.c)+male+I(ncec_dem_perf/100)+black+hispanic+protestant+catholic+I(q_medianhhincome/100)+I(q_percentcollegegrads/100),data=dta.sv11)

texreg(list(lout.canvass,lout.canvass.svy),stars=0.05,digits=3)

### Table 1, balance for those who complete phone survey

ivs <- c("q_age","black","male","hispanic","vh_02g","vh_04p","vh_04g","vh_06p","vh_06g","vh_08p","turnout.score.c","q_supportscore","catholic","protestant","q_pres04d","ncec_dem_perf","q_medianhhincomet","q_percentsingleparents","q_percentinpoverty","q_percentcollegegrads","q_percenthomeowners","q_percenturban","q_percentwhitecollar","q_percentunemployed","q_percenthispanic","q_percentasian","q_percentafricanamerican","q_percent65andolder")

rmatc <- matrix(NA,length(ivs),8)
rownames(rmatc) <- ivs
colnames(rmatc) <- c("Mean, C=1","Mean, C=0","P-value","Mean, C=1","Mean, C=2","P-value","N","N.survey")

for(i in 1:length(ivs)){
	txt <- paste("tout <- t.test(dta$",ivs[i],"[dta$canvass==1],dta$",ivs[i],"[dta$canvass==0])",sep="")
	eval(parse(text=txt))
	rmatc[i,1] <- tout$estimate[1]
	rmatc[i,2] <- tout$estimate[2]
	rmatc[i,3] <- tout$p.value

	txt <- paste("tout2 <- t.test(dta.sv11$",ivs[i],"[dta.sv11$canvass==1],dta.sv11$",ivs[i],"[dta.sv11$canvass==0])",sep="")
	eval(parse(text=txt))

	rmatc[i,4] <- tout2$estimate[1]
	rmatc[i,5] <- tout2$estimate[2]
	rmatc[i,6] <- tout2$p.value

	txt3 <- paste("hold <- length(na.omit(dta$",ivs[i],"))",sep="")
	eval(parse(text=txt3))
	rmatc[i,7] <- hold

	txt4 <- paste("hold <- length(na.omit(dta.sv11$",ivs[i],"))",sep="")
	eval(parse(text=txt4))
	rmatc[i,8] <- hold


}
round(rmatc,digits=3)
library(xtable)

### appendix table A2
xtable(rmatc[,c(1:3,7)],digits=c(0,rep(3,3),0))

### table 1
xtable(rmatc[,c(4:6,8)],digits=c(0,3,3,3,0))


#### appendix table A2, balance in random assignment

ivs <- c("q_age","black","male","hispanic","vh_02g","vh_04p","vh_04g","vh_06p","vh_06g","vh_08p","turnout.score.c","q_supportscore","catholic","protestant","q_pres04d","ncec_dem_perf","q_medianhhincomet","q_percentsingleparents","q_percentinpoverty","q_percentcollegegrads","q_percenthomeowners","q_percenturban","q_percentwhitecollar","q_percentunemployed","q_percenthispanic","q_percentasian","q_percentafricanamerican","q_percent65andolder")

rmatc <- matrix(NA,length(ivs),8)
rownames(rmatc) <- ivs
colnames(rmatc) <- c("Mean, C=1","Mean, C=0","P-value","Mean, C=1","Mean, C=2","P-value","N","N.survey")

for(i in 1:length(ivs)){
	txt <- paste("tout <- t.test(dta$",ivs[i],"[dta$canvass==1],dta$",ivs[i],"[dta$canvass==0])",sep="")
	eval(parse(text=txt))
	rmatc[i,1] <- tout$estimate[1]
	rmatc[i,2] <- tout$estimate[2]
	rmatc[i,3] <- tout$p.value

	txt <- paste("tout2 <- t.test(dta.sv11$",ivs[i],"[dta.sv11$canvass==1],dta.sv11$",ivs[i],"[dta.sv11$canvass==0])",sep="")
	eval(parse(text=txt))

	rmatc[i,4] <- tout2$estimate[1]
	rmatc[i,5] <- tout2$estimate[2]
	rmatc[i,6] <- tout2$p.value

	txt3 <- paste("hold <- length(na.omit(dta$",ivs[i],"))",sep="")
	eval(parse(text=txt3))
	rmatc[i,7] <- hold

	txt4 <- paste("hold <- length(na.omit(dta.sv11$",ivs[i],"))",sep="")
	eval(parse(text=txt4))
	rmatc[i,8] <- hold


}
round(rmatc,digits=3)
library(xtable)

### Appendix Table A2
xtable(rmatc[,c(1:3,7)],digits=c(0,rep(3,3),0))
#xtable(rmatc[,c(4:6,8)],digits=c(0,3,3,3,0))

### overall response rate by canvassing assignment
table(dta$survey,dta$canvass)

###### appendix table A3, phone treatment and mail treatment
####

ivs <- c("q_age","black","male","hispanic","vh_02g","vh_04p","vh_04g","vh_06p","vh_06g","vh_08p","turnout.score.c","q_supportscore","catholic","protestant","q_pres04d","ncec_dem_perf","q_medianhhincomet","q_percentsingleparents","q_percentinpoverty","q_percentcollegegrads","q_percenthomeowners","q_percenturban","q_percentwhitecollar","q_percentunemployed","q_percenthispanic","q_percentasian","q_percentafricanamerican","q_percent65andolder")

rmatp <- matrix(NA,length(ivs),7)
rownames(rmatp) <- ivs
colnames(rmatp) <- c("Mean, P=1","Mean, P=0","P-value","Mean, M=1","Mean, M=0","P-value","N")

for(i in 1:length(ivs)){
	txt <- paste("tout <- t.test(dta.sv11$",ivs[i],"[dta.sv11$phonecall==1],dta.sv11$",ivs[i],"[dta.sv11$phonecall==0])",sep="")
	eval(parse(text=txt))
	rmatp[i,1] <- tout$estimate[1]
	rmatp[i,2] <- tout$estimate[2]
	rmatp[i,3] <- tout$p.value

	txt <- paste("tout2 <- t.test(dta.sv11$",ivs[i],"[dta.sv11$mail==1],dta.sv11$",ivs[i],"[dta.sv11$mail==0])",sep="")
	eval(parse(text=txt))

	rmatp[i,4] <- tout2$estimate[1]
	rmatp[i,5] <- tout2$estimate[2]
	rmatp[i,6] <- tout2$p.value

	txt3 <- paste("hold <- length(na.omit(dta.sv11$",ivs[i],"))",sep="")
	eval(parse(text=txt3))
	rmatp[i,7] <- hold

}
round(rmatp,digits=3)

#### Appendix Table A3
xtable(rmatp[,1:6],digits=c(0,rep(3,6)))


#### F tests

glout.survey <- glm(survey ~  canvass+phonecall+mail+q_supportscore+turnout.score.c+male+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9,data=dta)

mf <- model.frame(glout.survey)

glout1<- lm(survey ~  canvass+phonecall+mail+q_supportscore+turnout.score.c+male+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9,data=mf)

glout2 <- lm(survey ~  1,data=mf)

anova(glout2,glout1)

#### Table 2
dataset <- c("dta","dta.sv2","dta.sv5","dta.sv7","dta.sv11")

rmat30 <- rmat20 <- rmat10 <- matrix(NA,length(dataset),5)

for(i in 1:length(dataset)){

	txt1 <- paste("dta.sub <- ",dataset[i])
	eval(parse(text=txt1))
	tout1 <- t.test(dta.sub$turnout.score[dta.sub$canvass==1],dta.sub$turnout.score[dta.sub$canvass==0])

	rmat10[i,1] <- tout1$estimate[1]
	rmat10[i,2] <- tout1$estimate[2]
	rmat10[i,3] <- tout1$estimate[1]-tout1$estimate[2]
	rmat10[i,4] <- tout1$p.value
	rmat10[i,5] <- dim(dta.sub)[1]

	tout2 <- t.test(dta.sub$turnout.score[dta.sub$phonecall==1],dta.sub$turnout.score[dta.sub$phonecall==0])

	rmat20[i,1] <- tout2$estimate[1]
	rmat20[i,2] <- tout2$estimate[2]
	rmat20[i,3] <- tout2$estimate[1]-tout2$estimate[2]
	rmat20[i,4] <- tout2$p.value
	rmat20[i,5] <- dim(dta.sub)[1]

	tout3 <- t.test(dta.sub$turnout.score[dta.sub$mail==1],dta.sub$turnout.score[dta.sub$mail==0])

	rmat30[i,1] <- tout3$estimate[1]
	rmat30[i,2] <- tout3$estimate[2]
	rmat30[i,3] <- tout3$estimate[1]-tout3$estimate[2]
	rmat30[i,4] <- tout3$p.value
	rmat30[i,5] <- dim(dta.sub)[1]
}
library(xtable)

### stages of non-response: canvass
xtable(rmat10,digits=c(0,3,3,3,3,0))

xtable(rmat20,digits=c(0,3,3,3,3,0))
xtable(rmat30,digits=c(0,3,3,3,3,0))

#### FIGURE 1

ConfIntervals    = matrix(NA, 10, 4)
for(ii in 1:length(unique(dta$turnout.score.c))){
    turn <- sort(unique(dta$turnout.score.c))[ii]
    zzx     = lm(dta$survey[dta$turnout.score.c==turn]~dta$canvass[dta$turnout.score.c==turn])
    summary(zzx)
    ConfIntervals[ii, 1]         = coefficients(zzx)[2]
    ConfIntervals[ii, 2:3]     = confint(zzx)[2, 1:2]
    ConfIntervals[ii, 4]         = length(resid(zzx))
}
plot(0:9, c(min(ConfIntervals[,2:3]), max(ConfIntervals[,2:3]), rep(0, 8)), type="n", xlab = "", ylab = "", xaxt='n', yaxt='n')
    axis(1, at = seq(0, 9,by=1), labels =  seq(0, 9,by=1), tick = T,
        cex.axis = .8, mgp = c(2,.4,0))
    axis(2, tick = T, cex.axis = .8, mgp = c(2,.7,0))
    mtext("Prior turnout level",             side = 1, line = 1.7, cex = 0.9)
    mtext("Effect of canvass on survey response rate",     side = 2, line = 2.2, cex = 0.9)
for(ii in 1:length(unique(dta$turnout.score.c))){
    lines(c(ii-1, ii-1), ConfIntervals[ii,2:3])
    lines(c(ii-1.06, ii-0.94), c(ConfIntervals[ii,2], ConfIntervals[ii,2]))
    lines(c(ii-1.06, ii-0.94), c(ConfIntervals[ii,3], ConfIntervals[ii,3]))
    abline(h = 0, lty = 3, col = "grey") # add horiontal line
    #points(ii-1, ConfIntervals[ii,1], pch = 16)
    points(ii-1, ConfIntervals[ii,1], pch = 16, cex = 15*ConfIntervals[ii,4]/sum(ConfIntervals[,4]))
}




### Table 3

all.sub <- lm(vh_08g ~ canvass + phonecall+mail,data=dta)
svy.sub <- lm(vh_08g ~ canvass + phonecall+mail,data=dta.sv11)

texreg(list(all.sub,svy.sub),digits=3,stars=0.05)


###
### multiple imputation

interactions <- c("canvass.i.score.1","phonecall.i.score.1","canvass.i.score.2", "phonecall.i.score.2","canvass.i.score.3","phonecall.i.score.3","canvass.i.score.4","phonecall.i.score.4","canvass.i.score.5","phonecall.i.score.5","canvass.i.score.6","phonecall.i.score.6","canvass.i.score.7","phonecall.i.score.7","canvass.i.score.8","phonecall.i.score.8","canvass.i.score.9","phonecall.i.score.9")

cn <- c("obama","mccain","canvass","phonecall","mail","q_supportscore","turnout.score.c","male","q_age","ncec_dem_perf","black","hispanic","protestant","catholic","q_medianhhincome","q_percentcollegegrads")
dta.sub <- subset(dta,select=c(cn,interactions))

m1 <- mice(dta.sub)

load("mi-dta-03012016.Rdata")

n1 <- complete(m1,action=1)
n2 <- complete(m1,action=2)
n3 <- complete(m1,action=3)
n4 <- complete(m1,action=4)
n5 <- complete(m1,action=5)

n1$canvass <- n2$canvass <- n3$canvass <- n4$canvass <- n5$canvass <- dta$canvass
n1$phonecall <- n2$phonecall <- n3$phonecall <- n4$phonecall <- n5$phonecall <- dta$phonecall
n1$mail <- n2$mail <- n3$mail <- n4$mail <- n5$mail <- dta$mail

#### linear probability

lout1 <- lm(obama ~  canvass+phonecall+mail+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1)
lout2 <- lm(obama ~  canvass+phonecall+mail+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1)
lout3 <- lm(obama ~  canvass+phonecall+mail+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1)
lout4 <- lm(obama ~  canvass+phonecall+mail+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1)
lout5 <- lm(obama ~  canvass+phonecall+mail+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1)



#### listwise deletion
glout.ld <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=dta,family=binomial(link="logit"))

glout1 <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1,family=binomial(link="logit"))
glout2 <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n2,family=binomial(link="logit"))
glout3 <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n3,family=binomial(link="logit"))
glout4 <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n4,family=binomial(link="logit"))
glout5 <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n5,family=binomial(link="logit"))

mf1 <- model.frame(glout1)
mf.vec.c <- mf.vec.t <- apply(mf1,2,median)

pro.vec.all <- table(n1$turnout.score.c)/dim(n1)[1]
pro.vec <- pro.vec.all[2:10]

mf.vec.t[2] <- 1
mf.vec.c[2] <- 0

mf.vec.t[3] <- 0
mf.vec.c[3] <- 0


mf.vec.t[names(mf.vec.t) %in% c("canvass.i.score.1","canvass.i.score.2","canvass.i.score.3","canvass.i.score.4","canvass.i.score.5","canvass.i.score.6","canvass.i.score.7","canvass.i.score.8","canvass.i.score.9")] <- pro.vec
mf.vec.c[names(mf.vec.c) %in% c("canvass.i.score.1","canvass.i.score.2","canvass.i.score.3","canvass.i.score.4","canvass.i.score.5","canvass.i.score.6","canvass.i.score.7","canvass.i.score.8","canvass.i.score.9")] <- rep(0,9)

M <- 10000
set.seed(11201)
betas.ld <- mvrnorm(M,mu=summary(glout.ld)$coef[,1],Sigma=vcov(glout.ld))
sims.tld <- 1/(1+exp(-betas.ld%*%mf.vec.t))
sims.cld <- 1/(1+exp(-betas.ld%*%mf.vec.c))
fd.ld <- sims.tld-sims.cld
quantile(fd.ld,c(0.025,.5,.975))
#        2.5%          50%        97.5% 
#-0.036440648 -0.016103807  0.004647237 


#### one-sided p-value
sum(fd.ld > 0)/M

#### two-sided p-value
(sum(fd.ld > 0)/M)*2
#[1] 0.1336


#### check via Zelig
glout.ld.z <- zelig(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=dta,model="logit")

pro.vec <- table(n1$turnout.score.c)/dim(n1)[1]
pro.zero <- rep(0,10)

x1 <- setx(glout.ld.z,canvass=1,canvass.i.score.1=pro.vec[2],canvass.i.score.2=pro.vec[3],canvass.i.score.3=pro.vec[4],
canvass.i.score.4=pro.vec[5],canvass.i.score.5=pro.vec[6],canvass.i.score.6=pro.vec[7],canvass.i.score.7=pro.vec[8],canvass.i.score.8=pro.vec[9],canvass.i.score.9=pro.vec[10])

x0 <- setx(glout.ld.z,canvass=0,canvass.i.score.1=pro.zero[2],canvass.i.score.2=pro.zero[3],canvass.i.score.3=pro.zero[4],
canvass.i.score.4=pro.zero[5],canvass.i.score.5=pro.zero[6],canvass.i.score.6=pro.zero[7],canvass.i.score.7=pro.zero[8],canvass.i.score.8=pro.zero[9],canvass.i.score.9=pro.zero[10])

set.seed(21230)
s1c <- sim(glout.ld.z,x=x0,x1=x1,num=10000)
summary(s1c)
#            mean         sd         50%        2.5%       97.5%
#[1,] -0.01643152 0.01098913 -0.01645458 -0.03796217 0.005234386


##########################
#### check imputation ####
##########################

n.sims <- 5
rmat <- matrix(NA,n.sims,1)
set.seed(20007)
for(i in 1:n.sims){

	idx <- which(! dta$obama %in% c(NA))
	remove.obs <- sample(idx,size=500,replace=F)

	dta.hold <- dta
	dta.hold$obama[remove.obs] <- NA

	interactions <- c("canvass.i.score.1","phonecall.i.score.1","canvass.i.score.2", "phonecall.i.score.2","canvass.i.score.3","phonecall.i.score.3","canvass.i.score.4","phonecall.i.score.4","canvass.i.score.5","phonecall.i.score.5","canvass.i.score.6","phonecall.i.score.6","canvass.i.score.7","phonecall.i.score.7","canvass.i.score.8","phonecall.i.score.8","canvass.i.score.9","phonecall.i.score.9")

	cn <- c("obama","mccain","canvass","phonecall","mail","q_supportscore","turnout.score.c","male","q_age","ncec_dem_perf","black","hispanic","protestant","catholic","q_medianhhincome","q_percentcollegegrads")
	dta.sub <- subset(dta.hold,select=c(cn,interactions))

	m1.test <- mice(dta.sub)

	n1t <- complete(m1.test,action=1)
	n2t <- complete(m1.test,action=2)
	n3t <- complete(m1.test,action=3)
	n4t <- complete(m1.test,action=4)
	n5t <- complete(m1.test,action=5)

	m1 <- sum(diag(table(n1t$obama[remove.obs],dta$obama[remove.obs])))/500
	m2 <- sum(diag(table(n2t$obama[remove.obs],dta$obama[remove.obs])))/500
	m3 <- sum(diag(table(n3t$obama[remove.obs],dta$obama[remove.obs])))/500
	m4 <- sum(diag(table(n4t$obama[remove.obs],dta$obama[remove.obs])))/500
	m5 <- sum(diag(table(n5t$obama[remove.obs],dta$obama[remove.obs])))/500
	
	rmat[i,1] <- mean(c(m1,m2,m3,m4,m5))
}

#       [,1]
#[1,] 0.7596
#[2,] 0.7488
#[3,] 0.7440
#[4,] 0.7328
#[5,] 0.7496



n1t$canvass <- n2t$canvass <- n3t$canvass <- n4t$canvass <- n5t$canvass <- dta$canvass
n1t$phonecall <- n2t$phonecall <- n3t$phonecall <- n4t$phonecall <- n5t$phonecall <- dta$phonecall
n1t$mail <- n2t$mail <- n3t$mail <- n4t$mail <- n5t$mail <- dta$mail

glout1t <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1t,family=binomial(link="logit"))
glout2t <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n2t,family=binomial(link="logit"))
glout3t <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n3t,family=binomial(link="logit"))
glout4t <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n4t,family=binomial(link="logit"))
glout5t <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n5t,family=binomial(link="logit"))








####
set.seed(22201)
betas1 <- mvrnorm(M,mu=summary(glout1)$coef[,1],Sigma=vcov(glout1))
sims.t1 <- 1/(1+exp(-betas1%*%mf.vec.t))
sims.c1 <- 1/(1+exp(-betas1%*%mf.vec.c))
fd1 <- sims.t1-sims.c1
quantile(fd1,c(0.025,.5,.975))

betas2 <- mvrnorm(M,mu=summary(glout2)$coef[,1],Sigma=vcov(glout2))
sims.t2 <- 1/(1+exp(-betas2%*%mf.vec.t))
sims.c2 <- 1/(1+exp(-betas2%*%mf.vec.c))
fd2 <- sims.t2-sims.c2

betas3 <- mvrnorm(M,mu=summary(glout3)$coef[,1],Sigma=vcov(glout3))
sims.t3 <- 1/(1+exp(-betas3%*%mf.vec.t))
sims.c3 <- 1/(1+exp(-betas3%*%mf.vec.c))
fd3 <- sims.t3-sims.c3

betas4 <- mvrnorm(M,mu=summary(glout4)$coef[,1],Sigma=vcov(glout4))
sims.t4 <- 1/(1+exp(-betas4%*%mf.vec.t))
sims.c4 <- 1/(1+exp(-betas4%*%mf.vec.c))
fd4 <- sims.t4-sims.c4

betas5 <- mvrnorm(M,mu=summary(glout5)$coef[,1],Sigma=vcov(glout5))
sims.t5 <- 1/(1+exp(-betas5%*%mf.vec.t))
sims.c5 <- 1/(1+exp(-betas5%*%mf.vec.c))
fd5 <- sims.t5-sims.c5

#### first differences, full data set imputation
fd.tot <- c(fd1,fd2,fd3,fd4,fd5)
quantile(fd.tot,c(0.025,.5,.975))
#        2.5%          50%        97.5% 
#-0.029435959 -0.018799506 -0.007896334 
2*(sum(fd.tot > 0)/M)


#### remove phonematch

n1p <- n1[! dta$q_phonematch=="",] 
n2p <- n2[! dta$q_phonematch=="",] 
n3p <- n3[! dta$q_phonematch=="",] 
n4p <- n4[! dta$q_phonematch=="",] 
n5p <- n5[! dta$q_phonematch=="",] 

glout1p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1p,family=binomial(link="logit"))
glout2p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n2p,family=binomial(link="logit"))
glout3p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n3p,family=binomial(link="logit"))
glout4p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n4p,family=binomial(link="logit"))
glout5p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+phonecall.i.score.2+canvass.i.score.3+phonecall.i.score.3+canvass.i.score.4+phonecall.i.score.4+canvass.i.score.5+phonecall.i.score.5+canvass.i.score.6+phonecall.i.score.6+canvass.i.score.7+phonecall.i.score.7+canvass.i.score.8+phonecall.i.score.8+canvass.i.score.9+phonecall.i.score.9+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n5p,family=binomial(link="logit"))

mf1 <- model.frame(glout1p)
mf.vec.c <- mf.vec.t <- apply(mf1,2,median)

pro.vec.all <- table(n1$turnout.score.c)/dim(n1)[1]
pro.vec <- pro.vec.all[2:10]

mf.vec.t[2] <- 1
mf.vec.c[2] <- 0

mf.vec.t[3] <- mf.vec.c[3] <- 0

mf.vec.t[names(mf.vec.t) %in% c("canvass.i.score.1","canvass.i.score.2","canvass.i.score.3","canvass.i.score.4","canvass.i.score.5","canvass.i.score.6","canvass.i.score.7","canvass.i.score.8","canvass.i.score.9")] <- pro.vec
mf.vec.c[names(mf.vec.c) %in% c("canvass.i.score.1","canvass.i.score.2","canvass.i.score.3","canvass.i.score.4","canvass.i.score.5","canvass.i.score.6","canvass.i.score.7","canvass.i.score.8","canvass.i.score.9")] <- rep(0,9)

set.seed(11201)
betas1p <- mvrnorm(M,mu=summary(glout1p)$coef[,1],Sigma=vcov(glout1p))
sims.t1p <- 1/(1+exp(-betas1p%*%mf.vec.t))
sims.c1p <- 1/(1+exp(-betas1p%*%mf.vec.c))
fd1p <- sims.t1p-sims.c1p
quantile(fd1p,c(0.025,.5,.975))

betas2p <- mvrnorm(M,mu=summary(glout2p)$coef[,1],Sigma=vcov(glout2p))
sims.t2p <- 1/(1+exp(-betas2p%*%mf.vec.t))
sims.c2p <- 1/(1+exp(-betas2p%*%mf.vec.c))
fd2p <- sims.t2p-sims.c2p

betas3p <- mvrnorm(M,mu=summary(glout3p)$coef[,1],Sigma=vcov(glout3p))
sims.t3p <- 1/(1+exp(-betas3p%*%mf.vec.t))
sims.c3p <- 1/(1+exp(-betas3p%*%mf.vec.c))
fd3p <- sims.t3p-sims.c3p

betas4p <- mvrnorm(M,mu=summary(glout4p)$coef[,1],Sigma=vcov(glout4p))
sims.t4p <- 1/(1+exp(-betas4p%*%mf.vec.t))
sims.c4p <- 1/(1+exp(-betas4p%*%mf.vec.c))
fd4p <- sims.t4p-sims.c4p

betas5p <- mvrnorm(M,mu=summary(glout5p)$coef[,1],Sigma=vcov(glout5p))
sims.t5p <- 1/(1+exp(-betas5p%*%mf.vec.t))
sims.c5p <- 1/(1+exp(-betas5p%*%mf.vec.c))
fd5p <- sims.t5p-sims.c5p

#### first differences, full data set imputation
fd.totp <- c(fd1p,fd2p,fd3p,fd4p,fd5p)
quantile(fd.totp,c(0.025,.5,.975))
#       2.5%          50%        97.5% 
#-0.030259045 -0.018072234 -0.006042126 


##### low-turnout respondents

n1p <- n1[ dta$turnout.score.c < 3,] 
n2p <- n2[ dta$turnout.score.c < 3,] 
n3p <- n3[ dta$turnout.score.c < 3,] 
n4p <- n4[ dta$turnout.score.c < 3,] 
n5p <- n5[ dta$turnout.score.c < 3,] 

glout1p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n1p,family=binomial(link="logit"))
glout2p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n2p,family=binomial(link="logit"))
glout3p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n3p,family=binomial(link="logit"))
glout4p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n4p,family=binomial(link="logit"))
glout5p <- glm(obama ~  canvass+phonecall+mail+canvass.i.score.1+phonecall.i.score.1+canvass.i.score.2+q_supportscore+turnout.score.c+male+q_age+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=n5p,family=binomial(link="logit"))

mf1 <- model.frame(glout1p)
mf.vec.c <- mf.vec.t <- apply(mf1,2,median)

pro.vec.all <- table(n1p$turnout.score.c)/dim(n1p)[1]
pro.vec <- pro.vec.all[2:3]

mf.vec.t[2] <- 1
mf.vec.c[2] <- 0

mf.vec.t[3] <- mf.vec.c[3] <- 0

mf.vec.t[names(mf.vec.t) %in% c("canvass.i.score.1","canvass.i.score.2")] <- pro.vec
mf.vec.c[names(mf.vec.c) %in% c("canvass.i.score.1","canvass.i.score.2")] <- rep(0,2)

set.seed(02138)
betas1p <- mvrnorm(M,mu=summary(glout1p)$coef[,1],Sigma=vcov(glout1p))
sims.t1p <- 1/(1+exp(-betas1p%*%mf.vec.t))
sims.c1p <- 1/(1+exp(-betas1p%*%mf.vec.c))
fd1p <- sims.t1p-sims.c1p
quantile(fd1p,c(0.025,.5,.975))

betas2p <- mvrnorm(M,mu=summary(glout2p)$coef[,1],Sigma=vcov(glout2p))
sims.t2p <- 1/(1+exp(-betas2p%*%mf.vec.t))
sims.c2p <- 1/(1+exp(-betas2p%*%mf.vec.c))
fd2p <- sims.t2p-sims.c2p

betas3p <- mvrnorm(M,mu=summary(glout3p)$coef[,1],Sigma=vcov(glout3p))
sims.t3p <- 1/(1+exp(-betas3p%*%mf.vec.t))
sims.c3p <- 1/(1+exp(-betas3p%*%mf.vec.c))
fd3p <- sims.t3p-sims.c3p

betas4p <- mvrnorm(M,mu=summary(glout4p)$coef[,1],Sigma=vcov(glout4p))
sims.t4p <- 1/(1+exp(-betas4p%*%mf.vec.t))
sims.c4p <- 1/(1+exp(-betas4p%*%mf.vec.c))
fd4p <- sims.t4p-sims.c4p

betas5p <- mvrnorm(M,mu=summary(glout5p)$coef[,1],Sigma=vcov(glout5p))
sims.t5p <- 1/(1+exp(-betas5p%*%mf.vec.t))
sims.c5p <- 1/(1+exp(-betas5p%*%mf.vec.c))
fd5p <- sims.t5p-sims.c5p

#### first differences, full data set imputation
fd.totp <- c(fd1p,fd2p,fd3p,fd4p,fd5p)
quantile(fd.totp,c(0.025,.5,.975))
#       2.5%         50%       97.5% 
#-0.05160580 -0.03676320 -0.01266744 

#####
#####

dta.sv5$WeakMatch <- 1*(dta.sv5$q_phonematchscore %in% c("Weak Match","Restricted Number - Weak Match"))
dta.sv5$MedMatch <- 1*(dta.sv5$q_phonematchscore %in% c("Medium Match","Restricted Number - Medium Match"))
dta.sv5$StrongMatch <- 1*(dta.sv5$q_phonematchscore %in% c("Restricted Number - Strong Match","Strong Match"))

# Estimate non-parametric selection model
# First generate propensity

PropensProbit = glm(survey    ~     WeakMatch + MedMatch + StrongMatch    + q_supportscore         + male                 + q_age    + ncec_dem_perf        + black             + hispanic + protestant +
    catholic         + q_medianhhincome         + q_percentcollegegrads + turnout.score.c     +
    canvass.i.score.1     + canvass.i.score.2     + canvass.i.score.3     + canvass.i.score.4     + canvass.i.score.5 +
    canvass.i.score.6     + canvass.i.score.7     + canvass.i.score.8     + canvass.i.score.9     +
    phonecall.i.score.1 + phonecall.i.score.2     + phonecall.i.score.3     + phonecall.i.score.4      + phonecall.i.score.5  +
    phonecall.i.score.6 + phonecall.i.score.7      + phonecall.i.score.8     + phonecall.i.score.9     +
    canvass         + phonecall     + mail     , na.action = na.exclude,
                    data = dta.sv5, family = binomial(link = "probit"))
summary(PropensProbit )
dta.sv5$Propensity  <- fitted(PropensProbit)        # includes NA's b/c of na.action = na.exclude command
dta.sv5$PropensitySq  <- fitted(PropensProbit )^2

ObamaWithPropensityNoIntera= lm(obama~
    q_supportscore     + male                 + q_age    + ncec_dem_perf        + black             + hispanic + protestant +
    catholic         + q_medianhhincome         + q_percentcollegegrads     +
    turnout.score.c     +
    Propensity + PropensitySq +
    canvass         + phonecall     + mail                 , data = dta.sv5, na.action = na.exclude )        # turnout.score.c +
summary(ObamaWithPropensityNoIntera)

ObamaWithPropensityNonVotersNoIntera= lm(obama~
    q_supportscore     + male                 + q_age    + ncec_dem_perf        + black             + hispanic + protestant +
    catholic         + q_medianhhincome         + q_percentcollegegrads     +
    turnout.score.c     +
    Propensity + PropensitySq +
    canvass         + phonecall     + mail                 , data = dta.sv5[dta.sv5$turnout.score.c <3,], na.action = na.exclude )     # turnout.score.c +
summary(ObamaWithPropensityNonVotersNoIntera)
#apsrtable(ObamaWithPropensityNoIntera, ObamaWithPropensityNonVotersNoIntera, digits=4, stars = "default" )
apsrtable(ObamaWithPropensityNoIntera, ObamaWithPropensityNonVotersNoIntera, digits=4, stars =c(0.05) )

## Heckman selection model


dta.sv5b <- dta[! dta$svy_result %in% c(        "21 Do not call","92 Invalid","90 Not in service","86 Tri-Tone","35 Privacy Manager"),]

dta.sv5b$WeakMatch <- 1*(dta.sv5b$q_phonematchscore %in% c("Weak Match","Restricted Number - Weak Match"))
dta.sv5b$MedMatch <- 1*(dta.sv5b$q_phonematchscore %in% c("Medium Match","Restricted Number - Medium Match"))
dta.sv5b$StrongMatch <- 1*(dta.sv5b$q_phonematchscore %in% c("Restricted Number - Strong Match","Strong Match"))




HeckResults1MLE <- selection(survey     ~ canvass + phonecall + mail + WeakMatch + MedMatch + StrongMatch ,
                obama            ~ canvass + phonecall + mail , data = dta.sv5b, method = "ml")    #dta.sv5 OR dta
summary(HeckResults1MLE)

HeckResults1a <- selection(survey     ~ canvass + phonecall + mail + WeakMatch + MedMatch + StrongMatch         + q_supportscore + ncec_dem_perf + male + black + hispanic,
                obama            ~ canvass + phonecall + mail +        + q_supportscore + ncec_dem_perf + male + black + hispanic, data = dta.sv5b)    #dta.sv5 OR dta
summary(HeckResults1a)

HeckResults1Limited <- selection(survey     ~ canvass + phonecall + mail + q_supportscore + 
                ncec_dem_perf + male + black + hispanic + WeakMatch + MedMatch + StrongMatch ,
                obama                ~ canvass + phonecall + mail + q_supportscore + ncec_dem_perf + male + black + hispanic, data = dta.sv5b[dta.sv5b$turnout.score.c<3,])
summary(HeckResults1Limited)

#####
#####
#####

##### WEIGHTING

dta.sub <- subset(dta,select=c("survey","canvass","canvass.i.score.1","canvass.i.score.2","canvass.i.score.3","canvass.i.score.4","canvass.i.score.5","canvass.i.score.6","canvass.i.score.7","canvass.i.score.8","canvass.i.score.9","phonecall.i.score.1","phonecall.i.score.2","phonecall.i.score.3","phonecall.i.score.4","phonecall.i.score.5","phonecall.i.score.6","phonecall.i.score.7","phonecall.i.score.8","phonecall.i.score.9","turnout.score.1","turnout.score.2","turnout.score.3","turnout.score.4","turnout.score.5","turnout.score.6","turnout.score.7","turnout.score.8","turnout.score.9","mail","phonecall","q_supportscore","male","ncec_dem_perf","black","hispanic","protestant","catholic","q_medianhhincome","q_percentcollegegrads","obama","mccain"))

pout <- glm(survey ~ canvass+canvass.i.score.1+canvass.i.score.2+canvass.i.score.3+canvass.i.score.4+canvass.i.score.5+canvass.i.score.6+canvass.i.score.7+canvass.i.score.8+canvass.i.score.9+phonecall.i.score.1+phonecall.i.score.2+phonecall.i.score.3+phonecall.i.score.4+phonecall.i.score.5+phonecall.i.score.6+phonecall.i.score.7+phonecall.i.score.8+phonecall.i.score.9+
turnout.score.1+turnout.score.2+turnout.score.3+turnout.score.4+turnout.score.5+turnout.score.6+turnout.score.7+turnout.score.8+turnout.score.9+mail+phonecall+q_supportscore+male+ncec_dem_perf+black+hispanic+protestant+catholic+q_medianhhincome+q_percentcollegegrads,data=dta.sub)

p1 <- predict(pout,newdata=dta.sub)

dta.sub$propensity1 <- p1

dta.sub$inv.propensity1 <- NA
dta.sub$inv.propensity1[dta.sub$survey==0] <- 1/(1-dta.sub$propensity1[dta.sub$survey==0])
dta.sub$inv.propensity1[dta.sub$survey==1] <- 1/(dta.sub$propensity1[dta.sub$survey==1])

lout1 <- lm(obama ~ canvass+phonecall+mail,weights=inv.propensity1,data=dta.sub)
set.seed(19104)
M <- 100000
x1 <- rnorm(M,mean=summary(lout1)$coef[2,1],sd=summary(lout1)$coef[2,2])
quantile(x1,c(0.025,.5,.975))











